set.seed(36)
rm(list=ls())
library(psych)
library(GPArotation)
library(stargazer)
library(corrplot)
library(reshape2)
library(broom)
library(AER)
library(multiwayvcov)
library(cluster)
library(factoextra)
library(stargazer)
hh <- read.csv("HH data CLEAN FULL - with wattage and consumption.csv") #Load data from SPI

###Subset to variables we will use

dt <- dplyr::select(hh, q701a_wtp_pack1, q701b_wtp_pack2, 
                    q701c_wtp_pack3, q701d_wtp_pack4, 
                    q501_A_a_num_Incadesbulb, Incadesbulb_watt_new, 
                    q501_B_a_num_CFL, CFL_watt_new, 
                    q501_C_a_num_LED, LED_watt_new, 
                    q501_D_a_num_tubelight, tubelight_watt_new, 
                    q501_E_a_num_mobile, mobile_watt_new, 
                    q501_F_a_num_ceilingfan, ceilingfan_watt_new, 
                    q501_G_a_num_tablefan, tablefan_watt_new, 
                    q501_H_a_num_tv, tv_watt_new, 
                    q501_I_a_num_cooler, cooler_watt_new, 
                    q501_J_a_num_elec_stove, elec_stove_watt_new, 
                    q501_K_a_num_comp_laptop, comp_laptop_watt_new, 
                    q501_L_a_num_fridge, fridge_watt_new, 
                    q501_M_a_num_iron, iron_watt_new, 
                    q501_N_a_num_grinder, grinder_watt_new, 
                    q501_O_a_num_music_sys, music_sys_watt_new, 
                    q501_P_a_num_ac, ac_watt_new, 
                    q501_Q_a_num_washing_mc, grinder_watt_new, 
                    q501_R_a_num_fodder_mc, fodder_mc_watt_new, 
                    q501_S_a_num_water_pump, water_pump_watt_new, 
                    q501_T_a_num_others, others_watt_new, 
                    q202_age, q208_edu, 
                    q209_religion, q210_caste, 
                    q215_annual_income, q301_a_grid_hamlet, 
                    q008_district_code, q011_village_code, 
                    q204_adults, q206_children, 
                    q308_grid_hours, q214_hh_expenses, 
                    Intervention, q800_a_leadgroup, 
                    q800_b_participate_community, q800_c_dislike_risks,
                    q800_d_stock_monthlyexp, q800_e_prefer_cheapovrquality, 
                    q800_f_buy_knownbrands, q301_grid_use, 
                    q335_mgrid_use, q421_a_light_satisfied, q006_state_code)


###Variable creation/cleaning
dt$house_size <- dt$q204_adults + dt$q206_children #add together for full house size

dt$education <- as.factor(dt$q208_edu) #create factors

dt$caste <- as.factor(dt$q210_caste) #create factors

##Village grid hour variable

dt$q308_grid_hours[is.na(dt$q308_grid_hours)] <- 0 #NA means 0 grid hours

vil_grid_hr <- group_by(dt, q011_village_code) %>% summarize(mean(q308_grid_hours)) #create village level gridhour data

dt <- merge(dt, vil_grid_hr, by="q011_village_code") #merge into dataset

dt$vil_grid_hr <- dt$`mean(q308_grid_hours)`#just renaming  ##done

dt$expenses <- log(dt$q214_hh_expenses+1) #create log expenses variable

dt$q301_grid_use[dt$q301_grid_use==99] <- 0 # 99 is 0- have connection but no electricity

##attitude factor creation##

attitude <- dplyr::select(dt, q800_a_leadgroup, q800_b_participate_community, 
                          q800_c_dislike_risks, q800_d_stock_monthlyexp, 
                          q800_e_prefer_cheapovrquality, q800_f_buy_knownbrands)


fac <- fa(attitude,nfactors = 3,rotate = "oblimin",fm="minres") #create the factors

#MR1:Leadership, MR2:Cheap, MR3:Risk aversion

##Appliance ownership variable creation##

dt_z <- dt #create new to store nas as 0s for appliance
dt_z[is.na(dt_z)] <- 0 #need to create appliance data. assume NAs are 0

dt_z$watt_appliance <- with(dt_z, q501_A_a_num_Incadesbulb * Incadesbulb_watt_new+
                              q501_B_a_num_CFL * CFL_watt_new+
                              q501_C_a_num_LED * LED_watt_new+
                              q501_D_a_num_tubelight * tubelight_watt_new+
                              q501_E_a_num_mobile * mobile_watt_new+
                              q501_F_a_num_ceilingfan * ceilingfan_watt_new+
                              q501_G_a_num_tablefan * tablefan_watt_new+
                              q501_H_a_num_tv * tv_watt_new+
                              q501_I_a_num_cooler * cooler_watt_new+
                              q501_J_a_num_elec_stove * elec_stove_watt_new+
                              q501_K_a_num_comp_laptop * comp_laptop_watt_new+
                              q501_L_a_num_fridge * fridge_watt_new+
                              q501_M_a_num_iron * iron_watt_new+
                              q501_N_a_num_grinder * grinder_watt_new+
                              q501_O_a_num_music_sys * music_sys_watt_new+
                              q501_P_a_num_ac * ac_watt_new+
                              q501_R_a_num_fodder_mc * fodder_mc_watt_new+
                              q501_S_a_num_water_pump * water_pump_watt_new+
                              q501_T_a_num_others * others_watt_new
)


##Create dataframe for segmentation 

seg <- dplyr::select(dt_z, q701d_wtp_pack4, q421_a_light_satisfied, watt_appliance) #Note that all NAs here are 0s- this is necessary for the clustering. We drop NAs for robustness later
#this is intentional and noted in manuscript

analysis_scale <- scale(seg, center = T) #scale it


##Now cluster analysis: 3 steps
#1. find correct number of clusters
#2. create the clusters
#3. create the clustering variables


##step 1
##Visualize the correct number #uncomment to run
gap_stat2 <- clusGap(analysis_scale, FUN = kmeans, K.max = 10, nstart=25, B = 500) #3 clusters

print(gap_stat2, method = "firstSEmax")
#Plot


############
##FIGURE 2##
############

fviz_gap_stat(gap_stat2,  maxSE = list(method = "firstSEmax"))



##step 2
set.seed(36); clust_3 <- kmeans(analysis_scale, 3, nstart = 25)
clust_3


#step 3
clustered <- analysis_scale
clustered <- cbind(clustered, clusterNum = clust_3$cluster)
clustered <- as.data.frame(clustered)

clustered$clusterNum[clustered$clusterNum==2] <- 0 #recode cluster numbers for ease of interp.
clustered$clusterNum[clustered$clusterNum==3] <- 2
clustered$clusterNum[clustered$clusterNum==0] <- 3

##create cluster level summary statistics

wtp <- group_by(clustered, clusterNum) %>% summarize(mean(q701d_wtp_pack4))

satis <-  group_by(clustered, clusterNum) %>% summarize(mean(q421_a_light_satisfied))

watt <- group_by(clustered, clusterNum) %>% summarize(mean(watt_appliance))

hist <- merge(wtp, satis, by="clusterNum")
hist <- merge(hist, watt, by="clusterNum")


hist_melt <- melt(hist, id="clusterNum", 
                  measure.vars = c("mean(q701d_wtp_pack4)",   "mean(q421_a_light_satisfied)",   "mean(watt_appliance)" ))



##Now create the dataframe for analysis

full <- dplyr::select(dt, q202_age, education, q209_religion, caste, q215_annual_income, q006_state_code, q011_village_code, house_size, vil_grid_hr, expenses, q301_grid_use, q335_mgrid_use, q421_a_light_satisfied, Intervention)
full <- cbind(full, fac$scores)
full$leader <- full$MR1 #create factor variables
full$cheap <- full$MR2
full$risk_averse <- full$MR3
full$watt_appliance <- seg$watt_appliance

full <- cbind(full, clustered$clusterNum)
full$clusterNum <- full$`clustered$clusterNum`



full$high_use <- as.numeric(full$`clustered$clusterNum`==3)

full$low_use <- as.numeric(full$`clustered$clusterNum`==1)

full$med_use <- as.numeric(full$`clustered$clusterNum`==2)

full$hindu <- full$q209_religion==1
full$scheduled <- full$caste==3 | full$caste==4

full$Intervention <- dplyr::recode(full$Intervention, "Distribution franchise"= "Distribution Franchise")

full$Intervention <- relevel(full$Intervention, "None")

full$educat[full$education==1 | full$education==2] <- 1
full$educat[full$education==3 | full$education==4] <- 2
full$educat[full$education==5 | full$education==6] <- 3
full$educat <- as.factor(full$educat)

full$wtp <- dt_z$q701d_wtp_pack4

full$state <- hh$q007_state






###################################
##Create cluster summary stats df##
###################################
full$watt_Incadesbulb <-   dt_z$q501_A_a_num_Incadesbulb *   dt_z$Incadesbulb_watt_new
full$watt_CFL  <-   dt_z$q501_B_a_num_CFL *  dt_z$CFL_watt_new
full$watt_LED   <-   dt_z$q501_C_a_num_LED *  dt_z$LED_watt_new
full$watt_tubelight     <-   dt_z$q501_D_a_num_tubelight * dt_z$tubelight_watt_new
full$watt_mobile <-  dt_z$q501_E_a_num_mobile * dt_z$mobile_watt_new
full$watt_ceilingfan  <-   dt_z$q501_F_a_num_ceilingfan *   dt_z$ceilingfan_watt_new
full$watt_tablefan   <-   dt_z$q501_G_a_num_tablefan * dt_z$tablefan_watt_new
full$watt_tv <- dt_z$q501_H_a_num_tv * dt_z$tv_watt_new                                 
full$watt_cooler   <-   dt_z$q501_I_a_num_cooler * dt_z$cooler_watt_new
full$watt_elec_stove   <-   dt_z$q501_J_a_num_elec_stove * dt_z$elec_stove_watt_new
full$watt_comp <- dt_z$q501_K_a_num_comp_laptop * dt_z$comp_laptop_watt_new
full$watt_fridge <- dt_z$q501_L_a_num_fridge * dt_z$fridge_watt_new
full$watt_iron    <-   dt_z$q501_M_a_num_iron * dt_z$iron_watt_new
full$watt_grinder    <-   dt_z$q501_N_a_num_grinder * dt_z$grinder_watt_new
full$watt_music <- dt_z$q501_O_a_num_music_sys * dt_z$music_sys_watt_new
full$watt_ac <- dt_z$q501_P_a_num_ac * dt_z$ac_watt_new
full$watt_fodder_mc   <-   dt_z$q501_R_a_num_fodder_mc * dt_z$fodder_mc_watt_new
full$watt_pump <- dt_z$q501_S_a_num_water_pump * dt_z$water_pump_watt_new                           
full$watt_others     <-   dt_z$q501_T_a_num_others * dt_z$others_watt_new

full_sum <- dplyr::select(full, wtp, q421_a_light_satisfied, watt_appliance, q202_age, 
                          educat, hindu, scheduled, expenses, house_size, vil_grid_hr, 
                          leader, cheap, risk_averse, q301_grid_use, q335_mgrid_use, 
                          watt_Incadesbulb, watt_CFL, watt_LED, watt_tubelight, watt_mobile, 
                          watt_ceilingfan, watt_tablefan, watt_tv, watt_cooler, watt_elec_stove, 
                          watt_comp, watt_iron, watt_grinder, watt_music, watt_ac, watt_fodder_mc, 
                          watt_pump, watt_others, clusterNum) 

full_sum$Uneducated <- full_sum$educat==1

full_sum$Schooling <- full_sum$educat==2

full_sum$Higher_ed <- full_sum$educat==3


full_sum_l <- full_sum %>% dplyr::select(wtp, q421_a_light_satisfied, 
                                         watt_appliance, q202_age, Uneducated, Schooling, Higher_ed, 
                                         hindu, scheduled, expenses, house_size, vil_grid_hr, leader, 
                                         cheap, risk_averse, q301_grid_use, q335_mgrid_use)


full_sum <- full_sum %>% dplyr::select(wtp, q421_a_light_satisfied, watt_appliance, 
                                       q202_age, Uneducated, Schooling, Higher_ed, hindu, 
                                       scheduled, expenses, house_size, vil_grid_hr, leader, 
                                       cheap, risk_averse, q301_grid_use, q335_mgrid_use, 
                                       watt_Incadesbulb, watt_CFL, watt_LED, watt_tubelight, 
                                       watt_mobile, watt_ceilingfan, watt_tablefan, watt_tv, 
                                       watt_cooler, watt_elec_stove, watt_comp, watt_iron, 
                                       watt_grinder, watt_music, watt_ac, watt_fodder_mc, 
                                       watt_pump, watt_others, clusterNum)


#########################################
##TABLES: Summary statistics by cluster##
#########################################

###########
##TABLE 1##
###########
stargazer(full_sum_l, type="latex", summary.stat = c("mean", "sd", "min", "max"), float=F, covariate.labels  =c("Willingness to Pay", "Satisfaction with Light", "Appliance Wattage", "Age",  "Uneducated?", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste or Tribe?", "Monthly Household Expenses (log)", "Household Size", "Village Hours of Grid Electricity", "Leader?", "Cheap?", "Risk Averse?", "Grid Connection?", "Mini-grid Connection?"))

############
##TABLE S1##
############

stargazer(full_sum[full_sum$clusterNum==1,], type="latex", float=F,  column.sep.width = "-5pt", covariate.labels  =c("Willingness to Pay", "Satisfaction with Light", "Appliance Wattage", "Age", "Uneducated?", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste or Tribe?", "Monthly Household Expenses", "Household Size", "Village Hours of Grid Electricity", "Leader?", "Cheap?", "Risk Averse?", "Grid Connection?", "Mini-grid Connection?", "Incandescent Bulb Wattage", "CFL Wattage", "LED Wattage", "Tubelight Wattage", "Mobile Wattage", "Ceiling Fan Wattage", "Table Fan Wattage", "TV Wattage", "Cooler Wattage", "Electric Stove Wattage", "Computer Wattage", "Iron Wattage", "Grinder Wattage", "Music Player Wattage", "Aircon Wattage", "Fodder Cutter Wattage", "Water Pump Wattage", "Other Wattage", "Cluster Number"))

############
##TABLE S2##
############
stargazer(full_sum[full_sum$clusterNum==2,], type="latex", float=F,  column.sep.width = "-5pt", covariate.labels  =c("Willingness to Pay", "Satisfaction with Light", "Appliance Wattage", "Age", "Uneducated?", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste or Tribe?", "Monthly Household Expenses", "Household Size", "Village Hours of Grid Electricity", "Leader?", "Cheap?", "Risk Averse?", "Grid Connection?", "Mini-grid Connection?", "Incandescent Bulb Wattage", "CFL Wattage", "LED Wattage", "Tubelight Wattage", "Mobile Wattage", "Ceiling Fan Wattage", "Table Fan Wattage", "TV Wattage", "Cooler Wattage", "Electric Stove Wattage", "Computer Wattage", "Iron Wattage", "Grinder Wattage", "Music Player Wattage", "Aircon Wattage", "Fodder Cutter Wattage", "Water Pump Wattage", "Other Wattage", "Cluster Number"))

############
##TABLE S3##
############
stargazer(full_sum[full_sum$clusterNum==3,], type="latex", float=F,  column.sep.width = "-5pt", omit.table.layout = "d", covariate.labels  =c("Willingness to Pay", "Satisfaction with Light", "Appliance Wattage", "Age", "Uneducated?", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste or Tribe?", "Monthly Household Expenses", "Household Size", "Village Hours of Grid Electricity", "Leader?", "Cheap?", "Risk Averse?", "Grid Connection?", "Mini-grid Connection?", "Incandescent Bulb Wattage", "CFL Wattage", "LED Wattage", "Tubelight Wattage", "Mobile Wattage", "Ceiling Fan Wattage", "Table Fan Wattage", "TV Wattage", "Cooler Wattage", "Electric Stove Wattage", "Computer Wattage", "Iron Wattage", "Grinder Wattage", "Music Player Wattage", "Aircon Wattage", "Fodder Cutter Wattage", "Water Pump Wattage", "Other Wattage", "Cluster Number"))



#######################################
##FIGURES: SUMMARY FIGURES BY CLUSTER##
#######################################

############
##FIGURE 3##
############
ggplot(hist_melt, 
       aes(clusterNum, value, fill=variable, 
           label=round(value,digits=1)))+
  geom_bar(stat="identity", position="dodge")+
  xlab("Cluster")+
  ylab("Average Scaled Value")+
  scale_fill_discrete( name = "Variables", labels = c("WTP", "Satisfaction", "Appliance Wattage"))+
  geom_text(position = position_dodge(width = 1), vjust=-0.5, size=3)



#############
##FIGURE S2##
#############
ggplot(clustered, aes(x=clusterNum))+geom_bar()+xlab("Cluster")+ylab("Number of Observations")


##Bivariate plots of cluster characteristics

############
##FIGURE 5##
############  #combines next 3 plots


ggplot(data = full, aes(x = wtp, y = q421_a_light_satisfied, color = as.factor(clusterNum), label=q421_a_light_satisfied)) + 
  geom_point() + geom_jitter(height = .01, alpha = .2)+
  scale_color_discrete( name = "Segments", labels = c("Potential Customer", "Low Demand", "High Use"))+
  xlab("WTP") + 
  ylab("Satisfaction")




ggplot(full, aes(x = q421_a_light_satisfied, y = watt_appliance, color = as.factor(clusterNum), label=watt_appliance)) + 
  geom_point() + geom_jitter(height = .01, alpha = .2)+
  scale_color_discrete( name = "Segments",  labels = c("Potential Customer", "Low Demand", "High Use"))+
  xlab("Satisfaction") + 
  ylab("Appliance Wattage")




ggplot(full, aes(x = watt_appliance, y = wtp, color = as.factor(clusterNum))) + 
  geom_point() + 
  geom_jitter(height = .01, alpha = .2)+
  scale_color_discrete( name = "Segments",  labels = c("Potential Customer", "Low Demand", "High Use"))+xlab("Appliance Wattage") + ylab("WTP")




#Create cluster histograms of clustering variables

f_wtp <- group_by(full, clusterNum) %>% summarize(mean(wtp))

f_satis <-  group_by(full, clusterNum) %>% summarize(mean(q421_a_light_satisfied))

f_watt <- group_by(full, clusterNum) %>% summarize(mean(watt_appliance))

f_hist <- merge(f_wtp, f_satis, by="clusterNum")
f_hist <- merge(f_hist, f_watt, by="clusterNum")

f_hist$wtp <- f_hist$`mean(wtp)`
f_hist$sat <- f_hist$`mean(q421_a_light_satisfied)`
f_hist$watt <- f_hist$`mean(watt_appliance)`

############
##FIGURE 4##
############  #combines next 3 plots


ggplot(f_hist, aes(as.factor(clusterNum), y=wtp, fill=as.factor(clusterNum), label=round(wtp, digits=1)))+
  geom_bar(stat="identity")+
  geom_text(position = position_stack(vjust = 0.5), size=3)+
  scale_fill_discrete(guide=FALSE)+xlab("Cluster")+ylab("WTP")






ggplot(f_hist, aes(as.factor(clusterNum), y=sat, fill=as.factor(clusterNum), label=round(sat, digits=1)))+
  geom_bar(stat="identity")+
  geom_text(position = position_stack(vjust = 0.5), size=3)+
  scale_fill_discrete(guide=FALSE)+xlab("Cluster")+ylab("Light Satisfaction")




ggplot(f_hist, aes(as.factor(clusterNum), y=watt, fill=as.factor(clusterNum), label=round(watt, digits=1)))+
  geom_bar(stat="identity")+
  geom_text(position = position_stack(vjust = 0.5), size=3)+
  scale_fill_discrete(guide=FALSE)+xlab("Cluster")+ylab("Appliance Wattage")




###################
#MULTINOMIAL LOGIT#
###################
###Multinomial logit###
full_drop <- droplevels(full) #get rid of unused factors in dataframe
full_drop$hindu <- as.factor(as.numeric(full_drop$hindu))
full_drop$scheduled <- as.factor(as.numeric(full_drop$scheduled))



full_Drop <- full_drop %>% rename(Cluster = clusterNum, Age = q202_age, Education = educat, Hindu = hindu,  Scheduled_Caste = scheduled, Log_Expenses =  expenses, Household_Size =  house_size, Village_Grid_Hours = vil_grid_hr, Leader =  leader, Cheap = cheap, Risk_Averse = risk_averse, Grid_Connection = q301_grid_use, Mini_Grid_Connection = q335_mgrid_use)

full_Drop$fstate <- as.factor(full_Drop$q006_state_code)
#model for plotting effects
mn3 <- multinom(Cluster ~ Age + Education + Hindu + Scheduled_Caste + Log_Expenses  + Household_Size + Village_Grid_Hours + Leader + Cheap+ Risk_Averse + Grid_Connection + Mini_Grid_Connection + fstate , data=full_Drop)


cov.list.mn <- c("Age", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste/Tribe?", "Log Expenses", "Household Size", "Village Grid Hours", "Leader", "Cheap", "Risk Averse", "Grid Connection?", "Mini-Grid Connection?")
# Substitution of coefficients

############
##TABLE S7##
############
stargazer(title = "Determinants of Segment Membership",
          
          header = F,
          mn3,
          covariate.labels = cov.list.mn,
          dep.var.labels = c("Low Demand", "High Use"),
          omit.stat =  c("rsq","ser","f"),
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit = c("fstate")
          
)
sink()

###create figures of effects

##############################
##MNL EFFECT PLOTS- APPENDIX##
##############################


plot(effect("Age", mn3))



plot(effect("Education", mn3))



plot(effect("Hindu", mn3))



plot(effect("Scheduled_Caste", mn3))



plot(effect("Log_Expenses", mn3))



plot(effect("Household_Size", mn3))




plot(effect("Village_Grid_Hours", mn3))



plot(effect("Leader", mn3))



plot(effect("Cheap", mn3))



plot(effect("Risk_Averse", mn3))



plot(effect("Grid_Connection", mn3))



plot(effect("Mini_Grid_Connection", mn3))



#######
##OLS##
#######
####HIGH USE MODELS###
##Model 1: village/socioecon

m1_h <- lm(high_use ~ q202_age + educat + hindu + scheduled  + house_size   + as.factor(q006_state_code), data=full)
m1_se_h <-coeftest(m1_h, cluster.vcov(m1_h, as.factor(full$q011_village_code)))

##Model 2: with psychology

m2_h <- lm(high_use ~ q202_age + educat + hindu + scheduled  + house_size  + leader + cheap+ risk_averse + as.factor(q006_state_code), data=full)
m2_se_h <-coeftest(m2_h, cluster.vcov(m2_h, as.factor(full$q011_village_code)))


##Model 3: Intervention
m3_h <- lm(high_use ~ q202_age + educat + hindu + scheduled + expenses  + house_size + vil_grid_hr + leader + cheap+ risk_averse + q301_grid_use + q335_mgrid_use  + as.factor(q006_state_code), data=full)
m3_se_h <-coeftest(m3_h, cluster.vcov(m3_h, as.factor(full$q011_village_code)))

###########
##TABLE 4##
###########

fit.list_h <- list(m1_h, m2_h, m3_h)
se.list_h <- list(m1_se_h[,2], m2_se_h[,2], m3_se_h[,2])
p.list_h <- list(m1_se_h[,4], m2_se_h[,4], m3_se_h[,4])
cov.list <- c("Age", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste/Tribe?", "Log Expenses", "Household Size", "Village Grid Hours", "Leader", "Cheap", "Risk Averse", "Grid Connection?", "Mini-Grid Connection?")
omit.stats <- c("rsq","ser","f")
dep.var.list <- c("High Use", "High Use", "High Use")



stargazer(title = "Determinants of Segment Membership",
          se = se.list_h,
          p = p.list_h,
          header = F,
          fit.list_h,
          covariate.labels = cov.list,
          dep.var.labels = "High Use",
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit = c("as.factor"),
          add.lines=list(c("State Fixed Effects?", "Yes", "Yes", "Yes")),
          notes = "Standard errors clustered at village level."
)


####LOW USE MODELS###
##Model 1: village/socioecon

m1_l <- lm(low_use ~ q202_age + educat + hindu + scheduled  + house_size  + as.factor(q006_state_code), data=full)
m1_se_l <-coeftest(m1_l, cluster.vcov(m1_l, as.factor(full$q011_village_code)))

##Model 2: with psychology

m2_l <- lm(low_use ~ q202_age + educat + hindu + scheduled  + house_size  + leader + cheap+ risk_averse + as.factor(q006_state_code), data=full)
m2_se_l <-coeftest(m2_l, cluster.vcov(m2_l, as.factor(full$q011_village_code)))


##Model 3: Intervention
m3_l <- lm(low_use ~ q202_age + educat + hindu + scheduled + expenses  + house_size + vil_grid_hr + leader + cheap+ risk_averse + q301_grid_use + q335_mgrid_use  + as.factor(q006_state_code), data=full)
m3_se_l <-coeftest(m3_l, cluster.vcov(m3_l, as.factor(full$q011_village_code)))

###########
##TABLE 2##
###########

fit.list_l <- list(m1_l, m2_l, m3_l)
se.list_l <- list(m1_se_l[,2], m2_se_l[,2], m3_se_l[,2])
p.list_l <- list(m1_se_l[,4], m2_se_l[,4], m3_se_l[,4])
cov.list <- c("Age", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste/Tribe?", "Log Expenses", "Household Size", "Village Grid Hours", "Leader", "Cheap", "Risk Averse", "Grid Connection?", "Mini-Grid Connection?")
omit.stats <- c("rsq","ser","f")
dep.var.list <- c("Potential Customer", "Potential Customer", "Potential Customer")


stargazer(title = "Determinants of Segment Membership",
          se = se.list_l,
          p = p.list_l,
          header = F,
          fit.list_l,
          covariate.labels = cov.list,
          dep.var.labels = "Potential Customer",
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit = c("as.factor"),
          add.lines=list(c("State Fixed Effects?", "Yes", "Yes", "Yes")),
          notes = "Standard errors clustered at village level."
)


####MED USE MODELS###
##Model 1: village/socioecon

m1_m <- lm(med_use ~ q202_age + educat + hindu + scheduled  + house_size   + as.factor(q006_state_code), data=full)
m1_se_m <-coeftest(m1_m, cluster.vcov(m1_m, as.factor(full$q011_village_code)))

##Model 2: with psychology

m2_m <- lm(med_use ~ q202_age + educat + hindu + scheduled  + house_size  + leader + cheap+ risk_averse + as.factor(q006_state_code), data=full)
m2_se_m <-coeftest(m2_m, cluster.vcov(m2_m, as.factor(full$q011_village_code)))


##Model 3: Intervention
m3_m <- lm(med_use ~ q202_age + educat + hindu + scheduled + expenses  + house_size + vil_grid_hr + leader + cheap+ risk_averse + q301_grid_use + q335_mgrid_use  + as.factor(q006_state_code), data=full)
m3_se_m <-coeftest(m3_m, cluster.vcov(m3_m, as.factor(full$q011_village_code)))

###########
##TABLE 3##
###########

fit.list_m <- list(m1_m, m2_m, m3_m)
se.list_m <- list(m1_se_m[,2], m2_se_m[,2], m3_se_m[,2])
p.list_m <- list(m1_se_m[,4], m2_se_m[,4], m3_se_m[,4])
cov.list <- c("Age", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste/Tribe?", "Log Expenses", "Household Size", "Village Grid Hours", "Leader", "Cheap", "Risk Averse", "Grid Connection?", "Mini-Grid Connection?")
omit.stats <- c("rsq","ser","f")
dep.var.list <- c("Low Demand", "Low Demand", "Low Demand")


stargazer(title = "Determinants of Segment Membership",
          se = se.list_m,
          p = p.list_m,
          header = F,
          fit.list_m,
          covariate.labels = cov.list,
          dep.var.labels = "Low Demand",
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit = c("as.factor"),
          add.lines=list(c("State Fixed Effects?", "Yes", "Yes", "Yes")),
          notes = "Standard errors clustered at village level."
)


##########
##Figure##
##########

##Figure: Cluster membership by state

############
##FIGURE 6##
############

ggplot(full, aes(as.factor(state), fill=as.factor(clusterNum)))+geom_bar()+
  scale_fill_discrete( name = "Cluster",  labels = c("Potential Customer", "Low Demand", "High Use"))+
  xlab("State")+ylab("Number of Individuals")+geom_text(stat='count', aes(label=..count..), position = position_stack(vjust = 0.5),size=3)




#Cluster membership by intervention
############
##FIGURE 7##
############
ggplot(full, aes(Intervention, fill=as.factor(clusterNum)))+
  geom_bar()+
  scale_fill_discrete( name = "Cluster",labels = c("Potential Customer", "Low Demand", "High Use"))+
  xlab("Intervention")+ylab("Number of Individuals")+geom_text(stat='count', aes(label=..count..), position = position_stack(vjust = 0.5),size=3)




#######################
###CORRELATION PLOTS###
#######################

seg_cor <- full %>% dplyr::select(q421_a_light_satisfied, wtp, watt_appliance) %>% cor(.)
colnames(seg_cor) <- c("Light Satisfaction", "WTP", "Appliance Wattage")
rownames(seg_cor) <- c("Light Satisfaction", "WTP", "Appliance Wattage")

############
##FIGURE 1##
############
corrplot(seg_cor, method="number")



wtp_cor <- dt %>% dplyr::select(q701a_wtp_pack1, q701b_wtp_pack2, q701c_wtp_pack3, q701d_wtp_pack4) %>% drop_na(.) %>% cor(.)
colnames(wtp_cor) <- c("WTP Low", "WTP Low-Mid", "WTP Mid-High", "WTP High")
rownames(wtp_cor) <- c("WTP Low", "WTP Low-Mid", "WTP Mid-High", "WTP High")

#############
##FIGURE S1##
#############
corrplot(wtp_cor, method="number")





##################################################
##ROBUSTNESS: DO ABOVE ANALYSIS DROPPING WTP NAs##
##################################################
hh_na <- hh %>% drop_na(q701d_wtp_pack4)
dt_na <- dplyr::select(hh_na, q701a_wtp_pack1, q701b_wtp_pack2, q701c_wtp_pack3, q701d_wtp_pack4, q501_A_a_num_Incadesbulb, Incadesbulb_watt_new, q501_B_a_num_CFL, CFL_watt_new, q501_C_a_num_LED, LED_watt_new, q501_D_a_num_tubelight, tubelight_watt_new, q501_E_a_num_mobile, mobile_watt_new, q501_F_a_num_ceilingfan, ceilingfan_watt_new, q501_G_a_num_tablefan, tablefan_watt_new, q501_H_a_num_tv, tv_watt_new, q501_I_a_num_cooler, cooler_watt_new, q501_J_a_num_elec_stove, elec_stove_watt_new, q501_K_a_num_comp_laptop, comp_laptop_watt_new, q501_L_a_num_fridge, fridge_watt_new, q501_M_a_num_iron, iron_watt_new, q501_N_a_num_grinder, grinder_watt_new, q501_O_a_num_music_sys, music_sys_watt_new, q501_P_a_num_ac, ac_watt_new, q501_Q_a_num_washing_mc, grinder_watt_new, q501_R_a_num_fodder_mc, fodder_mc_watt_new, q501_S_a_num_water_pump, water_pump_watt_new, q501_T_a_num_others, others_watt_new, q202_age, q208_edu, q209_religion, q210_caste, q215_annual_income, q301_a_grid_hamlet, q008_district_code, q011_village_code, q204_adults, q206_children, q308_grid_hours, q214_hh_expenses, Intervention, q800_a_leadgroup, q800_b_participate_community, q800_c_dislike_risks, q800_d_stock_monthlyexp, q800_e_prefer_cheapovrquality, q800_f_buy_knownbrands, q301_grid_use, q335_mgrid_use, q421_a_light_satisfied, q006_state_code)

dt_na$house_size <- dt_na$q204_adults + dt_na$q206_children

attitude_na <- dplyr::select(dt_na, q800_a_leadgroup, q800_b_participate_community, q800_c_dislike_risks, q800_d_stock_monthlyexp, q800_e_prefer_cheapovrquality, q800_f_buy_knownbrands)

dt_na$education <- as.factor(dt_na$q208_edu)

dt_na$caste <- as.factor(dt_na$q210_caste)


dt_na$q308_grid_hours[is.na(dt_na$q308_grid_hours)] <- 0

vil_grid_hr_na <- group_by(dt_na, q011_village_code) %>% summarize(mean(q308_grid_hours))

dt_na <- merge(dt_na, vil_grid_hr_na, by="q011_village_code")

dt_na$vil_grid_hr <- dt_na$`mean(q308_grid_hours)`

dt_na$expenses <- log(dt_na$q214_hh_expenses+1)

##attitude factor creation##

fa.parallel(attitude_na, fm = 'minres', fa = 'fa') #3 factors

fac_na <- fa(attitude_na,nfactors = 3,rotate = "oblimin",fm="minres") #create the factors

fac_na
#MR1:Leadership, MR2:Cheap, MR3:Risk aversion

full_na <- dplyr::select(dt_na, q202_age, education, q209_religion, caste, q215_annual_income, q006_state_code, q011_village_code, house_size, vil_grid_hr, expenses, q301_grid_use, q335_mgrid_use, q421_a_light_satisfied, Intervention, q701d_wtp_pack4)
full_na <- cbind(full_na, fac_na$scores)
full_na$leader <- full_na$MR1 #create factor variables
full_na$cheap <- full_na$MR2
full_na$risk_averse <- full_na$MR3



dt_na[is.na(dt_na)] <- 0 #NEEDED FOR APPLIANCE * USE- ASSUME NAS ARE 0
dt_na$q301_grid_use[dt_na$q301_grid_use==99] <- 1 #recode non electrified but connection to grid connection

seg_na <- dplyr::select(dt_na, q701d_wtp_pack4, q421_a_light_satisfied
)




seg_na$watt_appliance <- with(dt_na, q501_A_a_num_Incadesbulb * Incadesbulb_watt_new+
                                q501_B_a_num_CFL * CFL_watt_new+
                                q501_C_a_num_LED * LED_watt_new+
                                q501_D_a_num_tubelight * tubelight_watt_new+
                                q501_E_a_num_mobile * mobile_watt_new+
                                q501_F_a_num_ceilingfan * ceilingfan_watt_new+
                                q501_G_a_num_tablefan * tablefan_watt_new+
                                q501_H_a_num_tv * tv_watt_new+
                                q501_I_a_num_cooler * cooler_watt_new+
                                q501_J_a_num_elec_stove * elec_stove_watt_new+
                                q501_K_a_num_comp_laptop * comp_laptop_watt_new+
                                q501_L_a_num_fridge * fridge_watt_new+
                                q501_M_a_num_iron * iron_watt_new+
                                q501_N_a_num_grinder * grinder_watt_new+
                                q501_O_a_num_music_sys * music_sys_watt_new+
                                q501_P_a_num_ac * ac_watt_new+
                                q501_R_a_num_fodder_mc * fodder_mc_watt_new+
                                q501_S_a_num_water_pump * water_pump_watt_new+
                                q501_T_a_num_others * others_watt_new
)

full_na$watt_appliance <- seg_na$watt_appliance
# analysis$watt_appliance <- with(dt, q501_A_a_num_Incadesbulb * q501_A_b_watt_Incadesbulb+
#                                   q501_B_a_num_CFL * q501_B_b_watt_CFL+
#                                   q501_C_a_num_LED * q501_C_b_watt_LED+
#                                   q501_D_a_num_tubelight * q501_D_b_watt_tubelight+
#                                   q501_F_a_num_ceilingfan * q501_F_b_watt_ceilingfan+
#                                   q501_G_a_num_tablefan * q501_G_a_num_tablefan+
#                                   q501_H_a_num_tv * q501_H_b_watt_tv+
#                                   q501_I_a_num_cooler * q501_I_b_watt_cooler+
#                                   q501_J_a_num_elec_stove * q501_J_b_watt_elec_stove+
#                                   q501_K_a_num_comp_laptop * q501_K_b_watt_comp_laptop+
#                                   q501_L_a_num_fridge * q501_L_b_watt_fridge+
#                                   q501_M_a_num_iron * q501_M_b_watt_iron+
#                                   q501_N_a_num_grinder * q501_N_b_watt_grinder+
#                                   q501_O_a_num_music_sys * q501_O_b_watt_music_sys+
#                                   q501_P_a_num_ac * q501_P_b_watt_ac+
#                                   q501_Q_a_num_washing_mc * q501_Q_b_watt_washing_mc+
#                                   q501_R_a_num_fodder_mc * q501_R_b_watt_fodder_mc+
#                                   q501_S_a_num_water_pump * q501_S_b_watt_water_pump+
#                                   q501_T_a_num_others * q501_T_b_watt_others
#                                )



analysis_scale_na <- scale(seg_na, center = T) #scale it



gap_stat2_na <- clusGap(analysis_scale_na, FUN = kmeans,
 K.max = 10, nstart=25, B = 50)

print(gap_stat2_na, method = "firstSEmax")
# Plot

#############
##FIGURE S3##
#############

fviz_gap_stat(gap_stat2_na, 
 maxSE = list(method = "firstSEmax"))


set.seed(36); clust_na <- kmeans(analysis_scale_na, 2, nstart = 25)

clust_na

set.seed(36); clust_3_na <- kmeans(analysis_scale_na, 3, nstart = 25)
clust_3_na

fviz_cluster(clust_3_na, data = analysis_scale_na)


clustered_na <- analysis_scale_na
clustered_na <- cbind(clustered_na, clusterNum = clust_3_na$cluster)
clustered_na <- as.data.frame(clustered_na)

clustered_na$clusterNum[clustered_na$clusterNum==1] <- 0
clustered_na$clusterNum[clustered_na$clusterNum==3] <- 1
clustered_na$clusterNum[clustered_na$clusterNum==0] <- 3



full_na <- cbind(full_na, clustered_na$clusterNum)
full_na$clusterNum <- full_na$`clustered_na$clusterNum`

full_na$high_use <- as.numeric(full_na$`clustered_na$clusterNum`==3)

full_na$med_use <- as.numeric(full_na$`clustered_na$clusterNum`==2)

full_na$low_use <- as.numeric(full_na$`clustered_na$clusterNum`==1)




wtp_na <- group_by(clustered_na, clusterNum) %>% summarize(mean(q701d_wtp_pack4))

satis_na <-  group_by(clustered_na, clusterNum) %>% summarize(mean(q421_a_light_satisfied))

watt_na <- group_by(clustered_na, clusterNum) %>% summarize(mean(watt_appliance))

hist_na <- merge(wtp_na, satis_na, by="clusterNum")
hist_na <- merge(hist_na, watt_na, by="clusterNum")


hist_melt_na <- melt(hist_na, id="clusterNum", measure.vars = c("mean(q701d_wtp_pack4)",   "mean(q421_a_light_satisfied)",   "mean(watt_appliance)" ))

#############
##FIGURE S4##
#############
ggplot(hist_melt_na, aes(clusterNum, value, fill=variable))+geom_bar(stat="identity", position="dodge")+xlab("Cluster")+ylab("Average Scaled Value")+scale_color_discrete( name = "Variables", labels = c("WTP", "Satisfaction", "Appliance Wattage"))



full_na$hindu <- full_na$q209_religion==1
full_na$scheduled <- full_na$caste==3 | full_na$caste==4

full_na$Intervention <- dplyr::recode(full_na$Intervention, "Distribution franchise"= "Distribution Franchise")

full_na$Intervention <- relevel(full_na$Intervention, "None")

full_na$educat[full_na$education==1 | full_na$education==2] <- 1
full_na$educat[full_na$education==3 | full_na$education==4] <- 2
full_na$educat[full_na$education==5 | full_na$education==6] <- 3
full_na$educat <- as.factor(full_na$educat)



#only need 1 model- binary membership variable
full_na$wtp <- dt_na$q701d_wtp_pack4

#############
##FIGURE S6##
#############


ggplot(data = full_na, aes(x = wtp, y = q421_a_light_satisfied, color = as.factor(clusterNum))) + geom_point() + geom_jitter(height = .01, alpha = .2)+scale_color_discrete( name = "Segments",  labels = c("Potential Customer", "Low Demand", "High Use"))+xlab("WTP") + ylab("Satisfaction")



ggplot(full_na, aes(x = q421_a_light_satisfied, y = watt_appliance, color = as.factor(clusterNum))) + geom_point() + geom_jitter(height = .01, alpha = .2)+scale_color_discrete( name = "Segments",  labels = c("Potential Customer", "Low Demand", "High Use"))+xlab("Satisfaction") + ylab("Appliance Wattage")


ggplot(full_na, aes(x = watt_appliance, y = wtp, color = as.factor(clusterNum))) + geom_point() + geom_jitter(height = .01, alpha = .2)+scale_color_discrete( name = "Segments",  labels = c("Potential Customer", "Low Demand", "High Use"))+xlab("Appliance Wattage") + ylab("WTP")


f_wtp_na <- group_by(full_na, clusterNum) %>% summarize(mean(wtp))

f_satis_na <-  group_by(full_na, clusterNum) %>% summarize(mean(q421_a_light_satisfied))

f_watt_na <- group_by(full_na, clusterNum) %>% summarize(mean(watt_appliance))

f_hist_na <- merge(f_wtp_na, f_satis_na, by="clusterNum")
f_hist_na <- merge(f_hist_na, f_watt_na, by="clusterNum")

f_hist_na$wtp <- f_hist_na$`mean(wtp)`
f_hist_na$sat <- f_hist_na$`mean(q421_a_light_satisfied)`
f_hist_na$watt <- f_hist_na$`mean(watt_appliance)`

#############
##FIGURE S5##
#############
ggplot(f_hist_na, aes(as.factor(clusterNum), y=wtp, fill=as.factor(clusterNum)))+geom_bar(stat="identity")+scale_fill_discrete( name = "Cluster",  labels = c("Potential Customer", "Low Demand", "High Use"))+xlab("Cluster")+ylab("WTP")



ggplot(f_hist_na, aes(as.factor(clusterNum), y=sat, fill=as.factor(clusterNum)))+geom_bar(stat="identity")+scale_fill_discrete( name = "Cluster",  labels = c("Potential Customer", "Low Demand", "High Use"))+xlab("Cluster")+ylab("Satisfaction with Light")



ggplot(f_hist_na, aes(as.factor(clusterNum), y=watt, fill=as.factor(clusterNum)))+geom_bar(stat="identity")+scale_fill_discrete( name = "Cluster",  labels = c("Potential Customer", "Low Demand", "High Use"))+xlab("Cluster")+ylab("Total Appliance Wattage")





####HIGH USE MODELS###
##Model 1: village/socioecon

m1_h_na <- lm(high_use ~ q202_age + educat + hindu + scheduled  + house_size   + as.factor(q006_state_code), data=full_na)
m1_se_h_na <-coeftest(m1_h_na, cluster.vcov(m1_h_na, as.factor(full_na$q011_village_code)))

##Model 2: with psychology

m2_h_na <- lm(high_use ~ q202_age + educat + hindu + scheduled  + house_size  + leader + cheap+ risk_averse + as.factor(q006_state_code), data=full_na)
m2_se_h_na <-coeftest(m2_h_na, cluster.vcov(m2_h_na, as.factor(full_na$q011_village_code)))


##Model 3: Intervention
m3_h_na <- lm(high_use ~ q202_age + educat + hindu + scheduled + expenses  + house_size + vil_grid_hr + leader + cheap+ risk_averse + q301_grid_use + q335_mgrid_use  + as.factor(q006_state_code), data=full_na)
m3_se_h_na <-coeftest(m3_h_na, cluster.vcov(m3_h_na, as.factor(full_na$q011_village_code)))

############
##TABLE S6##
############

fit.list_h_na <- list(m1_h_na, m2_h_na, m3_h_na)
se.list_h_na <- list(m1_se_h_na[,2], m2_se_h_na[,2], m3_se_h_na[,2])
p.list_h_na <- list(m1_se_h_na[,4], m2_se_h_na[,4], m3_se_h_na[,4])
cov.list <- c("Age", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste/Tribe?", "Log Expenses", "Household Size", "Village Grid Hours", "Leader", "Cheap", "Risk Averse", "Grid Connection?", "Mini-Grid Connection?")
omit.stats <- c("rsq","ser","f")
dep.var.list <- c("High Use", "High Use", "High Use")

stargazer(title = "Determinants of Segment Membership",
          se = se.list_h_na,
          p = p.list_h_na,
          header = F,
          fit.list_h_na,
          covariate.labels = cov.list,
          dep.var.labels = "High Use",
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit = c("as.factor"),
          add.lines=list(c("State Fixed Effects?", "Yes", "Yes", "Yes")),
          notes = "Standard errors clustered at village level."
)


####LOW USE MODELS###
##Model 1: village/socioecon

m1_l_na <- lm(low_use ~ q202_age + educat + hindu + scheduled  + house_size   + as.factor(q006_state_code), data=full_na)
m1_se_l_na <-coeftest(m1_l_na, cluster.vcov(m1_l_na, as.factor(full_na$q011_village_code)))

##Model 2: with psychology

m2_l_na <- lm(low_use ~ q202_age + educat + hindu + scheduled  + house_size  + leader + cheap+ risk_averse + as.factor(q006_state_code), data=full_na)
m2_se_l_na <-coeftest(m2_l_na, cluster.vcov(m2_l_na, as.factor(full_na$q011_village_code)))


##Model 3: Intervention
m3_l_na <- lm(low_use ~ q202_age + educat + hindu + scheduled + expenses  + house_size + vil_grid_hr + leader + cheap+ risk_averse + q301_grid_use + q335_mgrid_use  + as.factor(q006_state_code), data=full_na)
m3_se_l_na <-coeftest(m3_l_na, cluster.vcov(m3_l_na, as.factor(full_na$q011_village_code)))

############
##TABLE S4##
############

fit.list_l_na <- list(m1_l_na, m2_l_na, m3_l_na)
se.list_l_na <- list(m1_se_l_na[,2], m2_se_l_na[,2], m3_se_l_na[,2])
p.list_l_na <- list(m1_se_l_na[,4], m2_se_l_na[,4], m3_se_l_na[,4])
cov.list <- c("Age", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste/Tribe?", "Log Expenses", "Household Size", "Village Grid Hours", "Leader", "Cheap", "Risk Averse", "Grid Connection?", "Mini-Grid Connection?")
omit.stats <- c("rsq","ser","f")
dep.var.list <- c("Potential Customer", "Potential Customer", "Potential Customer")

stargazer(title = "Determinants of Segment Membership",
          se = se.list_l_na,
          p = p.list_l_na,
          header = F,
          fit.list_l_na,
          covariate.labels = cov.list,
          dep.var.labels = "Potential Customer",
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit = c("as.factor"),
          add.lines=list(c("State Fixed Effects?", "Yes", "Yes", "Yes")),
          notes = "Standard errors clustered at village level."
)


####MED USE MODELS###
##Model 1: village/socioecon

m1_m_na <- lm(med_use ~ q202_age + educat + hindu + scheduled  + house_size   + as.factor(q006_state_code), data=full_na)
m1_se_m_na <-coeftest(m1_m_na, cluster.vcov(m1_m_na, as.factor(full_na$q011_village_code)))

##Model 2: with psychology

m2_m_na <- lm(med_use ~ q202_age + educat + hindu + scheduled  + house_size  + leader + cheap+ risk_averse + as.factor(q006_state_code), data=full_na)
m2_se_m_na <-coeftest(m2_m_na, cluster.vcov(m2_m_na, as.factor(full_na$q011_village_code)))


##Model 3: Intervention
m3_m_na <- lm(med_use ~ q202_age + educat + hindu + scheduled + expenses  + house_size + vil_grid_hr + leader + cheap+ risk_averse + q301_grid_use + q335_mgrid_use  + as.factor(q006_state_code), data=full_na)
m3_se_m_na <-coeftest(m3_m_na, cluster.vcov(m3_m_na, as.factor(full_na$q011_village_code)))

############
##TABLE S5##
############

fit.list_m_na <- list(m1_m_na, m2_m_na, m3_m_na)
se.list_m_na <- list(m1_se_m_na[,2], m2_se_m_na[,2], m3_se_m_na[,2])
p.list_m_na <- list(m1_se_m_na[,4], m2_se_m_na[,4], m3_se_m_na[,4])
cov.list <- c("Age", "Schooling?", "Higher Education?", "Hindu?", "Scheduled Caste/Tribe?", "Log Expenses", "Household Size", "Village Grid Hours", "Leader", "Cheap", "Risk Averse", "Grid Connection?", "Mini-Grid Connection?")
omit.stats <- c("rsq","ser","f")
dep.var.list <- c("Low Demand", "Low Demand", "Low Demand")

stargazer(title = "Determinants of Segment Membership",
          se = se.list_m_na,
          p = p.list_m_na,
          header = F,
          fit.list_m_na,
          covariate.labels = cov.list,
          dep.var.labels = "Low Demand",
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit = c("as.factor"),
          add.lines=list(c("State Fixed Effects?", "Yes", "Yes", "Yes")),
          notes = "Standard errors clustered at village level."
)



